import urllib.request
import urllib.parse
import json
import urllib.error

"""
采集一些需要登录才能获取到的数据，如何绕过登录获取数据？---->静态cookie
个人信息页面是utf-8编码的，但是爬虫的时候报了unicode解码错误，是因为没有进入到个人信息界面，拦截回了登陆页面，而登录页面没有使用utf-8进行编码
一定要注意将post请求的请求体data编码encode(),和response获得的content解码decode()的时候的语言要与原网页的一致，查看网页源代码<head>中有记录
"""

url = "https://webvpn.neu.edu.cn/"
headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    # 'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Cache-Control': 'max-age=0',
    'Connection': 'keep-alive',
    'Cookie': 'wengine_vpn_ticketwebvpn_neu_edu_cn=378ad3e640a0a2c7; route=8768cab8c7e7ee1c6799ad807f94da0a; show_vpn=0; heartbeat=1; show_faq=0; refresh=1',
    'Host': 'webvpn.neu.edu.cn',
    'sec-ch-ua': '"Chromium";v="9", "Not?A_Brand";v="8"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-User': '?1',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 SLBrowser/9.0.3.5211 SLBChan/105',
    # 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 SLBrowser/9.0.3.5211 SLBChan/105',
}
my_request = urllib.request.Request(url=url, headers=headers)
response = urllib.request.urlopen(my_request)
content = response.read().decode("utf-8")
print(content)
